6 Microbial metagenomics

6.1 Microbial DNA fraction

6.1.1 Data overview

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(singlem_fraction, na.rm=T),
              sd=sd(singlem_fraction, na.rm=T),
              median=median(singlem_fraction, na.rm = TRUE),
              IQR=IQR(singlem_fraction, na.rm = TRUE)) %>% 
    tt()
tinytable_gm44sjvgzi7l4g92n38b
sample_type mean sd median IQR
Anal/cloacal swab 0.2598510 0.3407707 0.0022 0.574700
Faecal 0.5203527 0.2759433 0.6270 0.350875
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(singlem_fraction, na.rm=T),
              sd=sd(singlem_fraction, na.rm=T),
              median=median(singlem_fraction, na.rm = TRUE),
              IQR=IQR(singlem_fraction, na.rm = TRUE)) %>% 
    tt()
tinytable_k8hhs4rt83yi7ozzcduo
tax_group mean sd median IQR
Amphibians 0.5807892 0.1799217 0.62900 0.128200
Bats 0.2903304 0.2949997 0.20650 0.523400
Birds 0.2777113 0.3360460 0.07850 0.556025
Mammals 0.5871579 0.2841097 0.67420 0.175250
Reptiles 0.5885336 0.1701545 0.63475 0.114050

6.1.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(!is.na(singlem_fraction))%>%
    mutate(singlem_fraction=case_when(singlem_fraction>1~1,
                                      singlem_fraction<=1~singlem_fraction))%>%
    glm(singlem_fraction ~  tax_group + sample_type, data = .,family=quasibinomial)  %>%
    Anova(.,test.statistic = "F",type = "III") %>%
    tidy()%>%
    tt()
tinytable_2xes4n18mt1q3vec3ujo
term sumsq df F.values p.value
tax_group 148.8863 4 119.1247 2.466079e-91
sample_type 51.7346 1 165.5725 1.784789e-36
Residuals 630.8543 2019 NA NA
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(!is.na(singlem_fraction))%>%
    mutate(singlem_fraction=case_when(singlem_fraction>1~1,
                                      singlem_fraction<=1~singlem_fraction),
           sample_type=factor(sample_type),
           tax_group=factor(tax_group))%>%
    glm(singlem_fraction ~  tax_group + sample_type, data = .,family=quasibinomial)  %>%
    glht(.,linfct = mcp(tax_group = "Tukey"))%>%
    summary()%>%
    tidy()%>%
    tt()
tinytable_qogrqmedep9exf19luh2
term contrast null.value estimate std.error statistic adj.p.value
tax_group Bats - Amphibians 0 -1.25097357 0.10967066 -11.4066383 0.000000e+00
tax_group Birds - Amphibians 0 -0.88809563 0.11334382 -7.8354131 2.153833e-14
tax_group Mammals - Amphibians 0 0.16650972 0.10026122 1.6607589 4.506758e-01
tax_group Reptiles - Amphibians 0 0.03298124 0.10207377 0.3231118 9.975622e-01
tax_group Birds - Bats 0 0.36287794 0.09724522 3.7315759 1.839021e-03
tax_group Mammals - Bats 0 1.41748328 0.08113495 17.4706869 0.000000e+00
tax_group Reptiles - Bats 0 1.28395481 0.08304328 15.4612727 0.000000e+00
tax_group Mammals - Birds 0 1.05460535 0.08202966 12.8563901 0.000000e+00
tax_group Reptiles - Birds 0 0.92107687 0.08779908 10.4907351 0.000000e+00
tax_group Reptiles - Mammals 0 -0.13352848 0.07012718 -1.9040901 3.080455e-01

6.1.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=singlem_fraction, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0, 1) +
        geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type) +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/microbialdata_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=singlem_fraction, x=sample_type, group=sample_type)) +
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/microbialdata_taxa_all.pdf",width=9, height=4, units="in")

6.2 Domain-adjusted mapping rate

6.2.1 Data summary

left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(damr, na.rm=T),
              sd=sd(damr, na.rm=T),
              median=median(damr, na.rm = TRUE),
              IQR=IQR(damr, na.rm = TRUE)) %>% 
    tt()
tinytable_1el0867yw03upylm44rp
sample_type mean sd median IQR
Anal/cloacal swab 72.84533 22.25244 76.29903 30.61081
Faecal 86.58154 25.00932 100.00000 15.61039
left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(damr, na.rm=T),
              sd=sd(damr, na.rm=T),
              median=median(damr, na.rm = TRUE),
              IQR=IQR(damr, na.rm = TRUE)) %>% 
    tt()
tinytable_0k6tlyq45z92zb0mcpjf
tax_group mean sd median IQR
Amphibians 89.19356 24.84575 100.00000 5.927094
Bats 80.94051 24.67483 91.76713 30.814236
Birds 67.03815 24.99000 72.18568 29.811183
Mammals 91.06092 16.77450 100.00000 11.191039
Reptiles 84.52325 31.14150 100.00000 10.748612

6.2.2 Statistical test

left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,11,singlem_fraction)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,1,mapping_percentage/singlem_fraction)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(!is.na(damr)) %>%
    glm(damr ~ sample_type + tax_group, data = .,family=quasibinomial)  %>%
    Anova(test.statistic = "F",type="III") %>%
    tidy()%>%
    tt()
tinytable_8612kpvx1xg3plhcngvc
term sumsq df F.values p.value
sample_type 13.49334 1 19.83706 8.794973e-06
tax_group 76.64409 4 28.16933 6.244313e-23
Residuals 1742.01471 2561 NA NA
left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,11,singlem_fraction)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,1,mapping_percentage/singlem_fraction)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(!is.na(damr)) %>%
    mutate(sample_type=factor(sample_type),
           tax_group=factor(tax_group))%>%
    glm(damr ~ sample_type + tax_group, data = .,family=quasibinomial) %>%
    glht(.,linfct = mcp(tax_group = "Tukey"))%>%
    summary()%>%
    tidy()%>%
    tt()
tinytable_nhcxwh1nae3urq2hser6
term contrast null.value estimate std.error statistic adj.p.value
tax_group Bats - Amphibians 0 -1.5736540 1.1220474 -1.4024844 5.926753e-01
tax_group Birds - Amphibians 0 -2.7097758 1.0700747 -2.5323240 7.222106e-02
tax_group Mammals - Amphibians 0 0.3210380 1.1686087 0.2747181 9.985222e-01
tax_group Reptiles - Amphibians 0 -3.2127466 1.0255111 -3.1328247 1.271399e-02
tax_group Birds - Bats 0 -1.1361218 0.5793468 -1.9610391 2.562036e-01
tax_group Mammals - Bats 0 1.8946920 0.7458078 2.5404560 7.042929e-02
tax_group Reptiles - Bats 0 -1.6390926 0.4921951 -3.3301684 6.757354e-03
tax_group Mammals - Birds 0 3.0308138 0.6650554 4.5572350 3.425588e-05
tax_group Reptiles - Birds 0 -0.5029708 0.3582254 -1.4040625 5.917653e-01
tax_group Reptiles - Mammals 0 -3.5337846 0.5906877 -5.9824922 1.185021e-08

6.2.3 Plot

left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(singlem_fraction>0) %>% 
    ggplot(aes(y=damr, x=sample_type, color=sample_type, fill=sample_type, group=sample_type)) +
        geom_boxplot(outlier.shape = NA) +
        scale_color_manual(values = c("#bdca50", "#AA3377")) +   
        scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
        theme_classic() +
        labs(y="DAMR", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/damr_type.pdf",width=5, height=4, units="in")
left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(singlem_fraction>0) %>% 
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=damr, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="DAMR", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/damr_taxa.pdf",width=9, height=4, units="in")

6.3 Assemblies

6.3.1 Data summary

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(sample_type) %>%
    summarise(mean=mean(assembly_length, na.rm=T),
              sd=sd(assembly_length, na.rm=T),
              median=median(assembly_length, na.rm = TRUE),
              IQR=IQR(assembly_length, na.rm = TRUE)) %>% 
    tt()
tinytable_gu2t1phmanykipupnq4x
sample_type mean sd median IQR
Anal/cloacal swab 13208042 34504175 0 0
Faecal 95955306 83010036 97047664 151385731
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(tax_group) %>%
    summarise(mean=mean(assembly_length, na.rm=T),
              sd=sd(assembly_length, na.rm=T),
              median=median(assembly_length, na.rm = TRUE),
              IQR=IQR(assembly_length, na.rm = TRUE)) %>% 
    tt()
tinytable_jyluojijtxisf4guxjw1
tax_group mean sd median IQR
Amphibians 116614689 68762523 121907456 88263039
Bats 31069176 77144825 0 41984504
Birds 8688016 26409919 0 0
Mammals 96415766 69841084 98624268 75754888
Reptiles 139349280 74190452 143970512 93046444

6.3.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    lm(rank(assembly_length) ~ sample_type + tax_group, data = .)  %>%
    Anova(type="III") %>%
    tidy()%>%
    tt()
tinytable_ht0xbirxm21enxiga88y
term sumsq df statistic p.value
(Intercept) 33267819 1 314.2467 3.650912e-64
sample_type 13028568 1 123.0674 1.427942e-27
tax_group 100440135 4 237.1885 5.790079e-159
Residuals 163985333 1549 NA NA

6.3.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=assembly_length, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0,400000000)+
        geom_jitter(alpha = 0.3, width=0.3, size=0.5) +
        geom_violin() +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type, scale="free") +
        labs(y="Assembly size", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/assemblysize_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=assembly_length, x=sample_type, group=sample_type)) +
          ylim(0,400000000)+
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Assembly size", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/assemblysize_taxa_all.pdf",width=9, height=4, units="in")

6.4 Number of MAGs

6.4.1 Summary statistics

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(sample_type) %>%
    summarise(mean=mean(assembly_num_bins, na.rm=T),
              sd=sd(assembly_num_bins, na.rm=T),
              median=median(assembly_num_bins, na.rm = TRUE),
              IQR=IQR(assembly_num_bins, na.rm = TRUE)) %>% 
    tt()
tinytable_bo1lkkdjid8dbwjo1gzt
sample_type mean sd median IQR
Anal/cloacal swab 2.644068 6.971768 0 0
Faecal 18.386792 16.099835 19 29
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(tax_group) %>%
    summarise(mean=mean(assembly_num_bins, na.rm=T),
              sd=sd(assembly_num_bins, na.rm=T),
              median=median(assembly_num_bins, na.rm = TRUE),
              IQR=IQR(assembly_num_bins, na.rm = TRUE)) %>% 
    tt()
tinytable_7eidtffc2jsfc7pqqzjx
tax_group mean sd median IQR
Amphibians 23.758389 14.944334 24 18
Bats 3.692308 7.451098 0 5
Birds 1.582979 5.600340 0 0
Mammals 22.062635 16.345908 23 19
Reptiles 23.924107 13.736366 24 18

6.4.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    glm(assembly_num_bins ~ sample_type + tax_group, data = .,family=quasipoisson)  %>%
    Anova(test.statistic = "F",type = "III") %>%
    tidy()%>%
    tt()
tinytable_i46hn44krcqq7bk5izhi
term sumsq df F.values p.value
sample_type 1781.293 1 131.4547 2.875375e-29
tax_group 9870.795 4 182.1096 5.599140e-128
Residuals 20989.917 1549 NA NA
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    mutate(sample_type=factor(sample_type),
           tax_group=factor(tax_group))%>%
    glm(assembly_num_bins ~ sample_type + tax_group, data = .,family=quasipoisson)  %>%
    glht(.,linfct = mcp(tax_group = "Tukey"))%>%
    summary()%>%
    tidy()%>%
    tt()
tinytable_1ypubwzxnhszim3to25f
term contrast null.value estimate std.error statistic adj.p.value
tax_group Bats - Amphibians 0 -1.89252007 0.13396130 -14.12736443 0.0000000
tax_group Birds - Amphibians 0 -2.37578946 0.20160147 -11.78458429 0.0000000
tax_group Mammals - Amphibians 0 -0.02494121 0.07183869 -0.34718348 0.9962960
tax_group Reptiles - Amphibians 0 -0.00222888 0.07136120 -0.03123378 0.9999997
tax_group Birds - Bats 0 -0.48326938 0.22581601 -2.14010238 0.1758987
tax_group Mammals - Bats 0 1.86757887 0.12433118 15.02100124 0.0000000
tax_group Reptiles - Bats 0 1.89029119 0.12401954 15.24188255 0.0000000
tax_group Mammals - Birds 0 2.35084825 0.19506107 12.05185782 0.0000000
tax_group Reptiles - Birds 0 2.37356058 0.19518653 12.16047329 0.0000000
tax_group Reptiles - Mammals 0 0.02271233 0.05098722 0.44545131 0.9903533

6.4.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=assembly_num_bins, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0,100) +
        geom_jitter(alpha = 0.6, width=0.3, size=0.5) +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type, scale="free") +
        labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/numberofbins_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=assembly_num_bins, x=sample_type, group=sample_type)) +
        ylim(0,100) +
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/numberofbins_taxa_all.pdf",width=9, height=4, units="in")

6.5 MAG quality

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=mag_completeness, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(50,100) +
        #geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="Number of bins", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/completeness_taxa.pdf",width=5, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=mag_contamination, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0,10) +
        #geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="Number of bins", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/contamination_taxa.pdf",width=5, height=4, units="in")

6.6 Assemblies vs MAGs

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual")%>%
    with(cor(assembly_num_bins,assembly_length))
[1] 0.8511555
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=assembly_num_bins, x=assembly_length, color=tax_group)) +
        geom_point(alpha=0.5, size=0.5) +
        scale_color_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        geom_smooth(method="lm",se=FALSE)+
        theme_classic() +
        labs(y="Number of bins", x="Assembly length", color="Taxa", fill="Taxa")